### Generate Text in Streaming
import asyncio

from tensorrt_llm import LLM, SamplingParams

# model could accept HF model name or a path to local HF model.
llm = LLM(model="TinyLlama/TinyLlama-1.1B-Chat-v1.0")

# Sample prompts.
prompts = [
    "Hello, my name is",
    "The president of the United States is",
    "The capital of France is",
    "The future of AI is",
]

# Create a sampling params.
sampling_params = SamplingParams(temperature=0.8, top_p=0.95)


# Async based on Python coroutines
async def task(id: int, prompt: str):

    # streaming=True is used to enable streaming generation.
    async for output in llm.generate_async(prompt,
                                           sampling_params,
                                           streaming=True):
        print(f"Generation for prompt-{id}: {output.outputs[0].text!r}")


async def main():
    tasks = [task(id, prompt) for id, prompt in enumerate(prompts)]
    await asyncio.gather(*tasks)


asyncio.run(main())

# Got output like follows:
# Generation for prompt-0: '\n'
# Generation for prompt-3: 'an'
# Generation for prompt-2: 'Paris'
# Generation for prompt-1: 'likely'
# Generation for prompt-0: '\n\n'
# Generation for prompt-3: 'an exc'
# Generation for prompt-2: 'Paris.'
# Generation for prompt-1: 'likely to'
# Generation for prompt-0: '\n\nJ'
# Generation for prompt-3: 'an exciting'
# Generation for prompt-2: 'Paris.'
# Generation for prompt-1: 'likely to nomin'
# Generation for prompt-0: '\n\nJane'
# Generation for prompt-3: 'an exciting time'
# Generation for prompt-1: 'likely to nominate'
# Generation for prompt-0: '\n\nJane Smith'
# Generation for prompt-3: 'an exciting time for'
# Generation for prompt-1: 'likely to nominate a'
# Generation for prompt-0: '\n\nJane Smith.'
# Generation for prompt-3: 'an exciting time for us'
# Generation for prompt-1: 'likely to nominate a new'
